# Import all packages
%matplotlib inline
import os
import gc
import pydotplus
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import export_graphviz
from sklearn.externals.six import StringIO
from IPython.display import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, auc, roc_curve, roc_auc_score, classification_report
from sklearn.metrics import recall_score, precision_score, accuracy_score, f1_score
np.random.seed(42)
# Read in data
df = pd.DataFrame(pd.read_csv('results.csv'))
# Review features
df.keys()
# encode = df
# le = LabelEncoder()
# encode['browser'] = le.fit_transform(df.browser)
# encode['day_of_week'] = le.fit_transform(df.day_of_week)
# encode['campaign'] = le.fit_transform(df.campaign)
# encode['traffic_source'] = le.fit_transform(df.traffic_source)
encode = pd.get_dummies(df)
encode.head(10)
X = encode.drop(['converted', 'visiting_time', 'total_amount_due', 'previous_payment_amount'], axis=1)
y = encode['converted'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0)
DTC = DecisionTreeClassifier()
DTC = DTC.fit(X_train, y_train)
print('Train Accuracy Score:', DTC.score(X_train, y_train))
print('Test Accuracy Score:', DTC.score(X_test, y_test))
y_pred = DTC.predict(X_test)
confusion_matrix(y_test, y_pred)
pd.crosstab(y_test, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(classification_report(y_test, y_pred))
y_pred_proba = DTC.predict_proba(X_test)[:,1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
plt.plot([0,1],[0,1], 'k--')
plt.plot(fpr, tpr, label='KNN')
plt.xlabel('fpr')
plt.ylabel('tpr')
plt.title('DT ROC Curve')
plt.show()
roc_auc_score(y_test, y_pred_proba)
features = X.columns
dot_data = StringIO()
export_graphviz(DTC, out_file=dot_data, filled=True, rounded=True, special_characters=True, feature_names=features, class_names=['Not Converted','Converted'])
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())
graph.write_png('Conversion_Path.png')
Image(graph.create_png())